--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -380,6 +380,7 @@
                             struct pipe_transfer **ptransfer)
 {
    struct nouveau_context *nv = nouveau_context(pipe);
+   struct nouveau_screen *screen = nv->screen;
    struct nv04_resource *buf = nv04_resource(resource);
    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
    uint8_t *map;
@@ -427,14 +428,19 @@
                buf->data = NULL;
             }
             nouveau_transfer_staging(nv, tx, false);
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_transfer_read(nv, tx);
+            pipe_mutex_unlock(screen->push_mutex);
          } else {
             /* The buffer is currently idle. Create a staging area for writes,
              * and make sure that the cached data is up-to-date. */
             if (usage & PIPE_TRANSFER_WRITE)
                nouveau_transfer_staging(nv, tx, true);
-            if (!buf->data)
+            if (!buf->data) {
+               pipe_mutex_lock(screen->push_mutex);
                nouveau_buffer_cache(nv, buf);
+               pipe_mutex_unlock(screen->push_mutex);
+            }
          }
       }
       return buf->data ? (buf->data + box->x) : tx->map;
@@ -479,7 +485,9 @@
       if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
          /* Discarding was not possible, must sync because
           * subsequent transfers might use UNSYNCHRONIZED. */
+         pipe_mutex_lock(screen->push_mutex);
          nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
+         pipe_mutex_unlock(screen->push_mutex);
       } else
       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
          /* The whole range is being discarded, so it doesn't matter what was
@@ -488,10 +496,13 @@
          map = tx->map;
       } else
       if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
-         if (usage & PIPE_TRANSFER_DONTBLOCK)
+         if (usage & PIPE_TRANSFER_DONTBLOCK) {
             map = NULL;
-         else
+         } else {
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
+            pipe_mutex_unlock(screen->push_mutex);
+         }
       } else {
          /* It is expected that the returned buffer be a representation of the
           * data in question, so we must copy it over from the buffer. */
@@ -515,9 +526,13 @@
 {
    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    struct nv04_resource *buf = nv04_resource(transfer->resource);
-
-   if (tx->map)
+   struct nouveau_screen *screen = nouveau_context(pipe)->screen;
+
+   if (tx->map) {
+      pipe_mutex_lock(screen->push_mutex);
       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
+      pipe_mutex_unlock(screen->push_mutex);
+   }
 
    util_range_add(&buf->valid_buffer_range,
                   tx->base.box.x + box->x,
@@ -537,11 +552,15 @@
    struct nouveau_context *nv = nouveau_context(pipe);
    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    struct nv04_resource *buf = nv04_resource(transfer->resource);
+   struct nouveau_screen *screen = nouveau_context(pipe)->screen;
 
    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
       if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
-         if (tx->map)
+         if (tx->map) {
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+            pipe_mutex_unlock(screen->push_mutex);
+         }
 
          util_range_add(&buf->valid_buffer_range,
                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -71,12 +71,14 @@
 
    ++fence->ref;
 
+   pipe_mutex_lock(screen->fence.list_mutex);
    if (screen->fence.tail)
       screen->fence.tail->next = fence;
    else
       screen->fence.head = fence;
 
    screen->fence.tail = fence;
+   pipe_mutex_unlock(screen->fence.list_mutex);
 
    screen->fence.emit(&screen->base, &fence->sequence);
 
@@ -90,6 +92,9 @@
    struct nouveau_fence *it;
    struct nouveau_screen *screen = fence->screen;
 
+   /* XXX This can race against fence_update. But fence_update can also call
+    * into this, so ... be have to be careful.
+    */
    if (fence->state == NOUVEAU_FENCE_STATE_EMITTED ||
        fence->state == NOUVEAU_FENCE_STATE_FLUSHED) {
       if (fence == screen->fence.head) {
@@ -123,6 +128,7 @@
       return;
    screen->fence.sequence_ack = sequence;
 
+   pipe_mutex_lock(screen->fence.list_mutex);
    for (fence = screen->fence.head; fence; fence = next) {
       next = fence->next;
       sequence = fence->sequence;
@@ -144,6 +150,7 @@
          if (fence->state == NOUVEAU_FENCE_STATE_EMITTED)
             fence->state = NOUVEAU_FENCE_STATE_FLUSHED;
    }
+   pipe_mutex_unlock(screen->fence.list_mutex);
 }
 
 #define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
@@ -198,11 +205,19 @@
    uint32_t spins = 0;
    int64_t start = 0;
 
+   /* Fast-path for the case where the fence is already signaled to avoid
+    * messing around with mutexes and timing.
+    */
+   if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
+      return true;
+
    if (debug && debug->debug_message)
       start = os_time_get_nano();
 
    if (!nouveau_fence_kick(fence))
       return false;
+
+   pipe_mutex_unlock(screen->push_mutex);
 
    do {
       if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
@@ -210,6 +225,7 @@
             pipe_debug_message(debug, PERF_INFO,
                                "stalled %.3f ms waiting for fence",
                                (os_time_get_nano() - start) / 1000000.f);
+         pipe_mutex_lock(screen->push_mutex);
          return true;
       }
       if (!spins)
@@ -226,6 +242,8 @@
    debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
                 fence->sequence,
                 screen->fence.sequence_ack, screen->fence.sequence);
+
+   pipe_mutex_lock(screen->push_mutex);
 
    return false;
 }


--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -2,6 +2,7 @@
 #ifndef __NOUVEAU_FENCE_H__
 #define __NOUVEAU_FENCE_H__
 
+#include "util/u_atomic.h"
 #include "util/u_inlines.h"
 #include "util/list.h"
 
@@ -47,10 +48,10 @@
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
    if (fence)
-      ++fence->ref;
+      p_atomic_inc(&fence->ref);
 
    if (*ref) {
-      if (--(*ref)->ref == 0)
+      if (p_atomic_dec_zero(&(*ref)->ref))
          nouveau_fence_del(*ref);
    }
 

src/gallium/drivers/nouveau/nouveau_screen.c
Diff
Switch to side-by-side view

--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -74,10 +74,14 @@
                             struct pipe_fence_handle *pfence,
                             uint64_t timeout)
 {
+   bool ret;
    if (!timeout)
       return nouveau_fence_signalled(nouveau_fence(pfence));
 
-   return nouveau_fence_wait(nouveau_fence(pfence), NULL);
+   pipe_mutex_lock(nouveau_screen(screen)->push_mutex);
+   ret = nouveau_fence_wait(nouveau_fence(pfence), NULL);
+   pipe_mutex_unlock(nouveau_screen(screen)->push_mutex);
+   return ret;
 }
 
 
@@ -153,6 +157,9 @@
    char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
    if (nv_dbg)
       nouveau_mesa_debug = atoi(nv_dbg);
+
+   pipe_mutex_init(screen->push_mutex);
+   pipe_mutex_init(screen->fence.list_mutex);
 
    /* These must be set before any failure is possible, as the cleanup
     * paths assume they're responsible for deleting them.
@@ -254,6 +261,9 @@
    nouveau_device_del(&screen->device);
    nouveau_drm_del(&screen->drm);
    close(fd);
+
+   pipe_mutex_destroy(screen->push_mutex);
+   pipe_mutex_destroy(screen->fence.list_mutex);
 }
 
 static void
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -3,6 +3,7 @@
 
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
+#include "os/os_thread.h"
 
 #ifdef DEBUG
 # define NOUVEAU_ENABLE_DRIVER_STATISTICS
@@ -22,6 +23,7 @@
    struct nouveau_object *channel;
    struct nouveau_client *client;
    struct nouveau_pushbuf *pushbuf;
+   pipe_mutex push_mutex;
 
    int refcount;
 
@@ -39,6 +41,7 @@
       struct nouveau_fence *head;
       struct nouveau_fence *tail;
       struct nouveau_fence *current;
+      pipe_mutex list_mutex;
       u32 sequence;
       u32 sequence_ack;
       void (*emit)(struct pipe_screen *, u32 *sequence);
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -58,8 +58,11 @@
    struct pipe_framebuffer_state *fb = &nv30->framebuffer;
    uint32_t colr = 0, zeta = 0, mode = 0;
 
-   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true))
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true)) {
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
+   }
 
    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
       colr  = pack_rgba(fb->cbufs[0]->format, color->f);
@@ -96,6 +99,7 @@
    PUSH_DATA (push, mode);
 
    nv30_state_release(nv30);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }
 
 static void
@@ -126,11 +130,15 @@
       rt_format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;
    }
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+
    refn.bo = mt->base.bo;
    refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
    if (nouveau_pushbuf_space(push, 32, 1, 0) ||
-       nouveau_pushbuf_refn (push, &refn, 1))
+       nouveau_pushbuf_refn (push, &refn, 1)) {
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
+   }
 
    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
    PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
@@ -155,6 +163,8 @@
                     NV30_3D_CLEAR_BUFFERS_COLOR_B |
                     NV30_3D_CLEAR_BUFFERS_COLOR_A);
 
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
+
    nv30->dirty |= NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR;
 }
 
@@ -191,11 +201,15 @@
    if (buffers & PIPE_CLEAR_STENCIL)
       mode |= NV30_3D_CLEAR_BUFFERS_STENCIL;
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+
    refn.bo = mt->base.bo;
    refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
    if (nouveau_pushbuf_space(push, 32, 1, 0) ||
-       nouveau_pushbuf_refn (push, &refn, 1))
+       nouveau_pushbuf_refn (push, &refn, 1)) {
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
+   }
 
    BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
    PUSH_DATA (push, 0);
@@ -221,6 +235,8 @@
    BEGIN_NV04(push, NV30_3D(CLEAR_BUFFERS), 1);
    PUSH_DATA (push, mode);
 
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
+
    nv30->dirty |= NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR;
 }
 
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -201,6 +201,8 @@
    if (!nv30)
       return NULL;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    nv30->screen = screen;
    nv30->base.screen = &screen->base;
    nv30->base.copy_data = nv30_transfer_copy_data;
@@ -226,6 +228,7 @@
    ret = nouveau_bufctx_new(nv30->base.client, 64, &nv30->bufctx);
    if (ret) {
       nv30_context_destroy(pipe);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return NULL;
    }
 
@@ -259,10 +262,13 @@
    nv30->blitter = util_blitter_create(pipe);
    if (!nv30->blitter) {
       nv30_context_destroy(pipe);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return NULL;
    }
 
    nouveau_context_init_vdec(&nv30->base);
 
+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;
 }
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -130,10 +130,12 @@
    struct nv30_context *nv30 = nv30_context(pipe);
    struct nv30_rect src, dst;
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    if (dstres->target == PIPE_BUFFER && srcres->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nv30->base,
                           nv04_resource(dstres), dstx,
                           nv04_resource(srcres), src_box->x, src_box->width);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    }
 
@@ -143,6 +145,7 @@
                        src_box->width, src_box->height, &dst);
 
    nv30_transfer_rect(nv30, NEAREST, &src, &dst);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }
 
 static void
@@ -163,6 +166,7 @@
    y1 = src.y1;
 
    /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    for (y = src.y0; y < y1; y += h) {
       h = y1 - y;
       if (h > 1024)
@@ -193,6 +197,7 @@
          nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
       }
    }
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }
 
 void
@@ -308,8 +313,12 @@
    tx->tmp.y1     = tx->tmp.h;
    tx->tmp.z      = 0;
 
-   if (usage & PIPE_TRANSFER_READ)
+   if (usage & PIPE_TRANSFER_READ) {
+      pipe_mutex_lock(nv30->screen->base.push_mutex);
       nv30_transfer_rect(nv30, NEAREST, &tx->img, &tx->tmp);
+      PUSH_KICK(nv30->base.pushbuf);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
+   }
 
    if (tx->tmp.bo->map) {
       *ptransfer = &tx->base;
@@ -340,11 +349,13 @@
    struct nv30_transfer *tx = nv30_transfer(ptx);
 
    if (ptx->usage & PIPE_TRANSFER_WRITE) {
+      pipe_mutex_lock(nv30->screen->base.push_mutex);
       nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
 
       /* Allow the copies above to finish executing before freeing the source */
       nouveau_fence_work(nv30->screen->base.fence.current,
                          nouveau_fence_unref_bo, tx->tmp.bo);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
    } else {
       nouveau_bo_ref(NULL, &tx->tmp.bo);
    }
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -152,6 +152,7 @@
    struct nv30_query *q = nv30_query(pq);
    struct nouveau_pushbuf *push = nv30->base.pushbuf;
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_TIME_ELAPSED:
       q->qo[0] = nv30_query_object_new(nv30->screen);
@@ -161,7 +162,7 @@
       }
       break;
    case PIPE_QUERY_TIMESTAMP:
-      return true;
+      break;
    default:
       BEGIN_NV04(push, NV30_3D(QUERY_RESET), 1);
       PUSH_DATA (push, q->report);
@@ -172,6 +173,7 @@
       BEGIN_NV04(push, SUBC_3D(q->enable), 1);
       PUSH_DATA (push, 1);
    }
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
    return true;
 }
 
@@ -183,6 +185,7 @@
    struct nv30_query *q = nv30_query(pq);
    struct nouveau_pushbuf *push = nv30->base.pushbuf;
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    q->qo[1] = nv30_query_object_new(screen);
    if (q->qo[1]) {
       BEGIN_NV04(push, NV30_3D(QUERY_GET), 1);
@@ -194,6 +197,7 @@
       PUSH_DATA (push, 0);
    }
    PUSH_KICK (push);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
    return true;
 }
 
@@ -248,9 +252,11 @@
    nv30->render_cond_mode = mode;
    nv30->render_cond_cond = condition;
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
    if (!pq) {
       BEGIN_NV04(push, SUBC_3D(0x1e98), 1);
       PUSH_DATA (push, 0x01000000);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    }
 
@@ -262,6 +268,7 @@
 
    BEGIN_NV04(push, SUBC_3D(0x1e98), 1);
    PUSH_DATA (push, 0x02000000 | q->qo[1]->hw->start);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }
 
 static void
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -563,6 +563,8 @@
    if (nv30->vbo_push_hint != !!nv30->vbo_fifo)
       nv30->dirty |= NV30_NEW_ARRAYS;
 
+   pipe_mutex_lock(nv30->screen->base.push_mutex);
+
    push->user_priv = &nv30->bufctx;
    if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
       nv30_update_user_vbufs(nv30);
@@ -570,10 +572,12 @@
    nv30_state_validate(nv30, ~0, true);
    if (nv30->draw_flags) {
       nv30_render_vbo(pipe, info);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    } else
    if (nv30->vbo_fifo) {
       nv30_push_vbo(nv30, info);
+      pipe_mutex_unlock(nv30->screen->base.push_mutex);
       return;
    }
 
@@ -630,6 +634,7 @@
 
    nv30_state_release(nv30);
    nv30_release_user_vbufs(nv30);
+   pipe_mutex_unlock(nv30->screen->base.push_mutex);
 }
 
 void
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -249,9 +249,11 @@
    struct nv50_program *cp = nv50->compprog;
    bool ret;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    ret = !nv50_state_validate_cp(nv50, ~0);
    if (ret) {
       NOUVEAU_ERR("Failed to launch grid !\n");
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -284,6 +286,8 @@
    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
 
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    /* bind a compute shader clobbers fragment shader state */
    nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
 }


--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -37,7 +37,9 @@
    if (fence)
       nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
 
+   pipe_mutex_lock(screen->push_mutex);
    PUSH_KICK(screen->pushbuf);
+   pipe_mutex_unlock(screen->push_mutex);
 
    nouveau_context_update_frame_stats(nouveau_context(pipe));
 }
@@ -47,10 +49,12 @@
 {
    struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
 
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
    BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
    PUSH_DATA (push, 0x20);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
 }
 
 static void
@@ -107,6 +111,7 @@
       data_words = string_words;
    else
       data_words = string_words + !!(len & 3);
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
    BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
    if (string_words)
       PUSH_DATAp(push, str, string_words);
@@ -115,6 +120,7 @@
       memcpy(&data, &str[string_words * 4], len & 3);
       PUSH_DATA (push, data);
    }
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
 }
 
 void
@@ -290,6 +296,8 @@
    if (!nv50)
       return NULL;
    pipe = &nv50->base.pipe;
+
+   pipe_mutex_lock(screen->base.push_mutex);
 
    if (!nv50_blitctx_create(nv50))
       goto out_err;
@@ -384,9 +392,12 @@
 
    util_dynarray_init(&nv50->global_residents);
 
+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;
 
 out_err:
+   pipe_mutex_unlock(screen->base.push_mutex);
    if (nv50->bufctx_3d)
       nouveau_bufctx_del(&nv50->bufctx_3d);
    if (nv50->bufctx_cp)
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -222,6 +222,11 @@
 /* nv50_draw.c */
 extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
 
+/* nv50_query.c */
+void nv50_render_condition(struct pipe_context *pipe,
+                           struct pipe_query *pq,
+                           boolean condition, uint mode);
+
 /* nv50_shader_state.c */
 void nv50_vertprog_validate(struct nv50_context *);
 void nv50_gmtyprog_validate(struct nv50_context *);
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -70,7 +70,7 @@
    return q->funcs->get_query_result(nv50_context(pipe), q, wait, result);
 }
 
-static void
+void
 nv50_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -145,6 +145,16 @@
 }
 
 static void
+nv50_render_condition_locked(struct pipe_context *pipe,
+                             struct pipe_query *pq,
+                             boolean condition, uint mode)
+{
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
+   nv50_render_condition(pipe, pq, condition, mode);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
+}
+
+static void
 nv50_set_active_query_state(struct pipe_context *pipe, boolean enable)
 {
 }
@@ -160,7 +170,7 @@
    pipe->end_query = nv50_end_query;
    pipe->get_query_result = nv50_get_query_result;
    pipe->set_active_query_state = nv50_set_active_query_state;
-   pipe->render_condition = nv50_render_condition;
+   pipe->render_condition = nv50_render_condition_locked;
    nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
 }
 
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -129,6 +129,7 @@
 {
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_hw_query *hq = nv50_hw_query(q);
+   bool ret = true;
 
    if (hq->funcs && hq->funcs->begin_query)
       return hq->funcs->begin_query(nv50, hq);
@@ -154,6 +155,7 @@
    if (!hq->is64bit)
       hq->data[0] = hq->sequence++; /* the previously used one */
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -193,10 +195,13 @@
       break;
    default:
       assert(0);
-      return false;
-   }
-   hq->state = NV50_HW_QUERY_STATE_ACTIVE;
-   return true;
+      ret = false;
+      break;
+   }
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+   if (ret)
+      hq->state = NV50_HW_QUERY_STATE_ACTIVE;
+   return ret;
 }
 
 static void
@@ -212,6 +217,7 @@
 
    hq->state = NV50_HW_QUERY_STATE_ENDED;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -264,6 +270,7 @@
       assert(0);
       break;
    }
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
    if (hq->is64bit)
       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
 }
@@ -286,16 +293,21 @@
       nv50_hw_query_update(q);
 
    if (hq->state != NV50_HW_QUERY_STATE_READY) {
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       if (!wait) {
          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
             PUSH_KICK(nv50->base.pushbuf);
          }
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return false;
       }
-      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
+      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) {
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return false;
+      }
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
    }
    hq->state = NV50_HW_QUERY_STATE_READY;
 
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -176,6 +176,7 @@
       return false;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 4);
    PUSH_SPACE(push, 4 * 4);
 
@@ -208,6 +209,7 @@
       BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1);
       PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }
 
@@ -237,6 +239,7 @@
       screen->pm.prog = prog;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* disable all counting */
    PUSH_SPACE(push, 8);
    for (c = 0; c < 4; c++) {
@@ -260,6 +263,7 @@
    PUSH_SPACE(push, 2);
    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
+   pipe_mutex_unlock(screen->base.push_mutex);
 
    pipe->bind_compute_state(pipe, screen->pm.prog);
    input[0] = hq->bo->offset + hq->base_offset;
@@ -276,6 +280,7 @@
 
    nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* re-active other counters */
    PUSH_SPACE(push, 8);
    mask = 0;
@@ -302,6 +307,7 @@
                     | cfg->ctr[i].unit | cfg->ctr[i].mode);
       }
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
 
 static inline bool
@@ -343,7 +349,9 @@
 
    cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    if (!ret)
       return false;
 
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -204,10 +204,13 @@
    bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nv50->base,
                           nv04_resource(dst), dstx,
                           nv04_resource(src), src_box->x, src_box->width);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -247,6 +250,7 @@
          else
             srect.base += src_mt->layer_stride;
       }
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -270,6 +274,7 @@
          break;
    }
    nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
 
 static void
@@ -289,14 +294,18 @@
 
    assert(dst->texture->target != PIPE_BUFFER);
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
    PUSH_DATAf(push, color->f[0]);
    PUSH_DATAf(push, color->f[1]);
    PUSH_DATAf(push, color->f[2]);
    PUSH_DATAf(push, color->f[3]);
 
-   if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
 
@@ -358,6 +367,8 @@
       PUSH_DATA (push, nv50->cond_condmode);
    }
 
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }
 
@@ -382,6 +393,8 @@
    assert(dst->texture->target != PIPE_BUFFER);
    assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (clear_flags & PIPE_CLEAR_DEPTH) {
       BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
       PUSH_DATAf(push, depth);
@@ -394,8 +407,10 @@
       mode |= NV50_3D_CLEAR_BUFFERS_S;
    }
 
-   if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
 
@@ -445,6 +460,8 @@
       BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
       PUSH_DATA (push, nv50->cond_condmode);
    }
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }
@@ -534,9 +551,12 @@
    unsigned i, j, k;
    uint32_t mode = 0;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
-   if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER))
+   if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    /* We have to clear ALL of the layers, not up to the min number of layers
     * of any attachment. */
@@ -602,6 +622,7 @@
    /* restore the array mode */
    BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
    PUSH_DATA (push, nv50->rt_array_mode);
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
 
 static void
@@ -729,14 +750,18 @@
 
    assert(size % data_size == 0);
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (offset & 0xff) {
       unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
       assert(fixup_size % data_size == 0);
       nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
       offset += fixup_size;
       size -= fixup_size;
-      if (!size)
+      if (!size) {
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return;
+      }
    }
 
    elements = size / data_size;
@@ -752,8 +777,10 @@
    PUSH_DATAf(push, color.f[2]);
    PUSH_DATAf(push, color.f[3]);
 
-   if (nouveau_pushbuf_space(push, 64, 1, 0))
+   if (nouveau_pushbuf_space(push, 64, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
 
@@ -807,6 +834,8 @@
       nv50_clear_buffer_push(pipe, res, offset, width * data_size,
                              data, data_size);
    }
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }
@@ -1724,6 +1753,8 @@
         info->src.box.height != -info->dst.box.height))
       eng3d = true;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (nv50->screen->num_occlusion_queries_active) {
       BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
       PUSH_DATA (push, 0);
@@ -1738,6 +1769,8 @@
       BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
       PUSH_DATA (push, 1);
    }
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
 
 static void
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -304,6 +304,7 @@
       unsigned base = tx->rect[0].base;
       unsigned z = tx->rect[0].z;
       unsigned i;
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       for (i = 0; i < box->depth; ++i) {
          nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],
                                  tx->nblocksx, tx->nblocksy);
@@ -313,6 +314,9 @@
             tx->rect[0].base += mt->layer_stride;
          tx->rect[1].base += size;
       }
+      /* Kick these reads out so we don't have to reacquire a lock below */
+      PUSH_KICK(nv50->base.pushbuf);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       tx->rect[0].z = z;
       tx->rect[0].base = base;
       tx->rect[1].base = 0;
@@ -349,6 +353,7 @@
    unsigned i;
 
    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       for (i = 0; i < tx->base.box.depth; ++i) {
          nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],
                                  tx->nblocksx, tx->nblocksy);
@@ -362,6 +367,7 @@
       /* Allow the copies above to finish executing before freeing the source */
       nouveau_fence_work(nv50->screen->base.fence.current,
                          nouveau_fence_unref_bo, tx->rect[1].bo);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
    } else {
       nouveau_bo_ref(NULL, &tx->rect[1].bo);
    }


--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -767,6 +767,8 @@
    bool tex_dirty = false;
    int s;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nv50->vb_elt_first = info->min_index + info->index_bias;
    nv50->vb_elt_limit = info->max_index - info->min_index;
@@ -827,6 +829,7 @@
       nv50_push_vbo(nv50, info);
       push->kick_notify = nv50_default_kick_notify;
       nouveau_pushbuf_bufctx(push, NULL);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -886,4 +889,6 @@
    nv50_release_user_vbufs(nv50);
 
    nouveau_pushbuf_bufctx(push, NULL);
-}
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+}


--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -424,13 +424,17 @@
 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_program *cp = nvc0->compprog;
    int ret;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    ret = !nvc0_state_validate_cp(nvc0, ~0);
    if (ret) {
       NOUVEAU_ERR("Failed to launch grid !\n");
+      pipe_mutex_unlock(screen->base.push_mutex);
       return;
    }
 
@@ -498,4 +502,6 @@
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
    nvc0->images_dirty[5] |= nvc0->images_valid[5];
-}
+
+   pipe_mutex_unlock(screen->base.push_mutex);
+}
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -38,7 +38,9 @@
    if (fence)
       nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
 
+   pipe_mutex_lock(screen->push_mutex);
    PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */
+   pipe_mutex_unlock(screen->push_mutex);
 
    nouveau_context_update_frame_stats(&nvc0->base);
 }
@@ -48,8 +50,10 @@
 {
    struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;
 
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
    IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
    IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0);
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }
 
 static void
@@ -58,6 +62,8 @@
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    int i, s;
+
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
 
    if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
       for (i = 0; i < nvc0->num_vtxbufs; ++i) {
@@ -108,6 +114,8 @@
       nvc0->cb_dirty = true;
    if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_INDEX_BUFFER))
       nvc0->base.vbo_dirty = true;
+
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }
 
 static void
@@ -124,6 +132,7 @@
       data_words = string_words;
    else
       data_words = string_words + !!(len & 3);
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
    BEGIN_NIC0(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
    if (string_words)
       PUSH_DATAp(push, str, string_words);
@@ -132,6 +141,7 @@
       memcpy(&data, &str[string_words * 4], len & 3);
       PUSH_DATA (push, data);
    }
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }
 
 static void
@@ -365,6 +375,8 @@
       return NULL;
    pipe = &nvc0->base.pipe;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    if (!nvc0_blitctx_create(nvc0))
       goto out_err;
 
@@ -468,9 +480,12 @@
 
    util_dynarray_init(&nvc0->global_residents);
 
+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;
 
 out_err:
+   pipe_mutex_unlock(screen->base.push_mutex);
    if (nvc0) {
       if (nvc0->bufctx_3d)
          nouveau_bufctx_del(&nvc0->bufctx_3d);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -299,6 +299,11 @@
                                     uint32_t label);
 void nvc0_program_init_tcp_empty(struct nvc0_context *);
 
+/* nvc0_query.c */
+void nvc0_render_condition(struct pipe_context *pipe,
+                           struct pipe_query *pq,
+                           boolean condition, uint mode);
+
 /* nvc0_shader_state.c */
 void nvc0_vertprog_validate(struct nvc0_context *);
 void nvc0_tctlprog_validate(struct nvc0_context *);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -92,7 +92,7 @@
                                        index, resource, offset);
 }
 
-static void
+void
 nvc0_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -161,6 +161,16 @@
    PUSH_DATA (push, hq->bo->offset + hq->offset);
 }
 
+static void
+nvc0_render_condition_locked(struct pipe_context *pipe,
+                             struct pipe_query *pq,
+                             boolean condition, uint mode)
+{
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
+   nvc0_render_condition(pipe, pq, condition, mode);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
+}
+
 int
 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
                                   unsigned id,
@@ -272,6 +282,6 @@
    pipe->get_query_result = nvc0_get_query_result;
    pipe->get_query_result_resource = nvc0_get_query_result_resource;
    pipe->set_active_query_state = nvc0_set_active_query_state;
-   pipe->render_condition = nvc0_render_condition;
+   pipe->render_condition = nvc0_render_condition_locked;
    nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
 }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -154,6 +154,7 @@
    }
    hq->sequence++;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -198,6 +199,7 @@
    default:
       break;
    }
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
    hq->state = NVC0_HW_QUERY_STATE_ACTIVE;
    return ret;
 }
@@ -221,6 +223,7 @@
    }
    hq->state = NVC0_HW_QUERY_STATE_ENDED;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -276,6 +279,7 @@
    default:
       break;
    }
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
    if (hq->is64bit)
       nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence);
 }
@@ -298,16 +302,21 @@
       nvc0_hw_query_update(nvc0->screen->base.client, q);
 
    if (hq->state != NVC0_HW_QUERY_STATE_READY) {
+      pipe_mutex_lock(nvc0->screen->base.push_mutex);
       if (!wait) {
          if (hq->state != NVC0_HW_QUERY_STATE_FLUSHED) {
             hq->state = NVC0_HW_QUERY_STATE_FLUSHED;
             /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
             PUSH_KICK(nvc0->base.pushbuf);
          }
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return false;
       }
-      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
+      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) {
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return false;
+      }
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
    }
    hq->state = NVC0_HW_QUERY_STATE_READY;
@@ -374,6 +383,8 @@
 
    assert(!hq->funcs || !hq->funcs->get_query_result);
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (index == -1) {
       /* TODO: Use a macro to write the availability of the query */
       if (hq->state != NVC0_HW_QUERY_STATE_READY)
@@ -382,6 +393,7 @@
       nvc0->base.push_cb(&nvc0->base, buf, offset,
                          result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
                          ready);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
 
@@ -469,6 +481,8 @@
                            4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
    }
 
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
+
    if (buf->mm) {
       nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
       nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1662,6 +1662,7 @@
       return false;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 4);
    PUSH_SPACE(push, 4 * 8 * + 6);
 
@@ -1710,6 +1711,7 @@
      BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1);
      PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }
 
@@ -1733,6 +1735,7 @@
       return false;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 8);
    PUSH_SPACE(push, 8 * 8 + 2);
 
@@ -1779,6 +1782,7 @@
       BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1);
       PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }
 
@@ -1866,6 +1870,7 @@
    if (unlikely(!screen->pm.prog))
       screen->pm.prog = nvc0_hw_sm_get_program(screen);
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* disable all counting */
    PUSH_SPACE(push, 8);
    for (c = 0; c < 8; ++c)
@@ -1893,6 +1898,7 @@
 
    /* upload input data for the compute shader which reads MP counters */
    nvc0_hw_sm_upload_input(nvc0, hq);
+   pipe_mutex_unlock(screen->base.push_mutex);
 
    pipe->bind_compute_state(pipe, screen->pm.prog);
    for (i = 0; i < 3; i++) {
@@ -1906,6 +1912,7 @@
 
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* re-activate other counters */
    PUSH_SPACE(push, 16);
    mask = 0;
@@ -1930,6 +1937,7 @@
          PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
       }
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
 
 static inline bool
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -507,7 +507,9 @@
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
+      pipe_mutex_lock(screen->base.push_mutex);
       nouveau_fence_wait(current, NULL);
+      pipe_mutex_unlock(screen->base.push_mutex);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -206,11 +206,14 @@
    bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nvc0->base,
                           nv04_resource(dst), dstx,
                           nv04_resource(src), src_box->x, src_box->width);
       NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
    NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1);
@@ -251,6 +254,7 @@
          else
             srect.base += src_mt->layer_stride;
       }
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
 
@@ -273,6 +277,7 @@
          break;
    }
    nouveau_bufctx_reset(nvc0->bufctx, 0);
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 static void
@@ -291,8 +296,12 @@
 
    assert(dst->texture->target != PIPE_BUFFER);
 
-   if (!PUSH_SPACE(push, 32 + sf->depth))
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
+   if (!PUSH_SPACE(push, 32 + sf->depth)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR);
 
@@ -357,6 +366,8 @@
       IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
 
    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 static void
@@ -542,8 +553,11 @@
 
    assert(size % data_size == 0);
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (data_size == 12) {
       nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
 
@@ -553,8 +567,10 @@
       nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
       offset += fixup_size;
       size -= fixup_size;
-      if (!size)
+      if (!size) {
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return;
+      }
    }
 
    elements = size / data_size;
@@ -564,8 +580,10 @@
       width &= ~0xff;
    assert(width > 0);
 
-   if (!PUSH_SPACE(push, 40))
+   if (!PUSH_SPACE(push, 40)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
 
@@ -613,6 +631,8 @@
    }
 
    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 static void
@@ -635,8 +655,11 @@
 
    assert(dst->texture->target != PIPE_BUFFER);
 
-   if (!PUSH_SPACE(push, 32 + sf->depth))
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+   if (!PUSH_SPACE(push, 32 + sf->depth)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
 
@@ -685,6 +708,8 @@
       IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
 
    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 void
@@ -698,9 +723,13 @@
    unsigned i, j, k;
    uint32_t mode = 0;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
-   if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER))
+   if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
       BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
@@ -759,6 +788,8 @@
                     (j << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
       }
    }
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 
@@ -1163,8 +1194,8 @@
    nvc0->samplers_dirty[4] |= 3;
 
    if (nvc0->cond_query && !blit->render_condition_enable)
-      nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
-                                       nvc0->cond_cond, nvc0->cond_mode);
+      nvc0_render_condition(&nvc0->base.pipe, nvc0->cond_query,
+                            nvc0->cond_cond, nvc0->cond_mode);
 
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
@@ -1626,6 +1657,8 @@
    if (info->num_window_rectangles > 0 || info->window_rectangle_include)
       eng3d = true;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
 
@@ -1636,6 +1669,8 @@
 
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 
    NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
 }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -342,16 +342,18 @@
    return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr, &nvc0->base.debug);
 }
 
-void *
-nvc0_miptree_transfer_map(struct pipe_context *pctx,
-                          struct pipe_resource *res,
-                          unsigned level,
-                          unsigned usage,
-                          const struct pipe_box *box,
-                          struct pipe_transfer **ptransfer)
+static void *
+nvc0_miptree_transfer_map_unlocked(
+      struct pipe_context *pctx,
+      struct pipe_resource *res,
+      unsigned level,
+      unsigned usage,
+      const struct pipe_box *box,
+      struct pipe_transfer **ptransfer)
 {
    struct nvc0_context *nvc0 = nvc0_context(pctx);
-   struct nouveau_device *dev = nvc0->screen->base.device;
+   struct nvc0_screen *screen = nvc0->screen;
+   struct nouveau_device *dev = screen->base.device;
    struct nv50_miptree *mt = nv50_miptree(res);
    struct nvc0_transfer *tx;
    uint32_t size;
@@ -465,9 +467,29 @@
    return tx->rect[1].bo->map;
 }
 
-void
-nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
-                            struct pipe_transfer *transfer)
+void *
+nvc0_miptree_transfer_map(
+      struct pipe_context *pctx,
+      struct pipe_resource *res,
+      unsigned level,
+      unsigned usage,
+      const struct pipe_box *box,
+      struct pipe_transfer **ptransfer)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   pipe_mutex_lock(screen->base.push_mutex);
+   void *ret = nvc0_miptree_transfer_map_unlocked(
+         pctx, res, level, usage, box, ptransfer);
+   pipe_mutex_unlock(screen->base.push_mutex);
+
+   return ret;
+}
+
+static void
+nvc0_miptree_transfer_unmap_unlocked(struct pipe_context *pctx,
+                                     struct pipe_transfer *transfer)
 {
    struct nvc0_context *nvc0 = nvc0_context(pctx);
    struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
@@ -507,6 +529,18 @@
    FREE(tx);
 }
 
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+                            struct pipe_transfer *transfer)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   pipe_mutex_lock(screen->base.push_mutex);
+   nvc0_miptree_transfer_unmap_unlocked(pctx, transfer);
+   pipe_mutex_unlock(screen->base.push_mutex);
+}
+
 /* This happens rather often with DTD9/st. */
 static void
 nvc0_cb_push(struct nouveau_context *nv,


--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -940,6 +940,8 @@
    struct nvc0_screen *screen = nvc0->screen;
    int s;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nvc0->vb_elt_first = info->min_index + info->index_bias;
    nvc0->vb_elt_limit = info->max_index - info->min_index;
@@ -1033,6 +1035,7 @@
       nvc0_push_vbo(nvc0, info);
       push->kick_notify = nvc0_default_kick_notify;
       nouveau_pushbuf_bufctx(push, NULL);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return;
    }
 
@@ -1085,4 +1088,5 @@
    nvc0_release_user_vbufs(nvc0);
 
    nouveau_pushbuf_bufctx(push, NULL);
-}
+   pipe_mutex_unlock(screen->base.push_mutex);
+}
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -604,11 +604,14 @@
 nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nve4_cp_launch_desc *desc;
    uint64_t desc_gpuaddr;
    struct nouveau_bo *desc_bo;
    int ret;
+
+   pipe_mutex_lock(screen->base.push_mutex);
 
    desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
    if (!desc) {
@@ -690,6 +693,7 @@
       NOUVEAU_ERR("Failed to launch grid !\n");
    nouveau_scratch_done(&nvc0->base);
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
 
 
